# 1、创建环境
from pyspark.context import SparkContext

sc = SparkContext(master='local', appName='demo2_map')

# 2、读取数据
students_rdd = sc.textFile("../../data/students.txt")

# 操作算子会触发任务的执行,每一个操作算子都会触发一个任务

# 1 count 统计rdd的行数
count = students_rdd.count()
print(count)

# 2 sum 求和
sum = students_rdd.map(lambda line: int(line.split(",")[2])).sum()
print(sum)

# 3 reduce: 聚合
reduce = students_rdd.map(lambda line: int(line.split(",")[2])).reduce(lambda x, y: x + y)
print(reduce)

# 4 take 取前几
top = students_rdd.take(100)
print(top)

# 5  collect: 将rdd转换成list
students_list = students_rdd.collect()
print(students_list)

# 6 foreach: 循环rdd数据
students_rdd.foreach(lambda x: print(x))
students_rdd.foreach(print)

# 7 save 保存到HDFS
students_rdd.saveAsTextFile("../../data/save")

while True:
    pass